Env setupu¶

In [1]:
import scanpy as sc, anndata as ad, numpy as np, pandas as pd
from scipy import sparse
from anndata import AnnData
from pandas.api.types import CategoricalDtype
from matplotlib import pylab
import warnings
import socket
import plotly.express as px
import yaml
from sklearn.cluster import DBSCAN
import holoviews as hv
import scvelo as scv
import seaborn as sns
import os
import sys
import scanpy.external as sce
import matplotlib as plt

warnings.filterwarnings('ignore')
In [2]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
with open("../data/resources/rcParams.yaml") as f:
    rcParamsDict = yaml.full_load(f)
    for k in rcParamsDict["rcParams"]:
        print("{} {}".format(k,rcParamsDict["rcParams"][k]))
        plt.rcParams[k] = rcParamsDict["rcParams"][k]
    for k1 in set(list(rcParamsDict)).difference(set(["rcParams"])):
        print("{} {}".format(k1,rcParamsDict[k1]))
scanpy==1.8.0 anndata==0.8.0 umap==0.4.6 numpy==1.22.2 scipy==1.6.2 pandas==1.2.3 scikit-learn==0.24.1 statsmodels==0.13.5 python-igraph==0.9.1 louvain==0.7.0 leidenalg==0.8.3
figure.dpi 80
savefig.dpi 500
figure.figsize [10, 10]
axes.facecolor white
dotSize 20

Configure paths¶

In [3]:
outdir = "../data/output"

with open("../data/resources/iPSC_lines_map.yaml", 'r') as f:
    iPSC_lines_map = yaml.load(f, Loader=yaml.FullLoader)["lines"]
colorsmap = dict(zip([i["newName"] for i in iPSC_lines_map.values()],[i["color"] for i in iPSC_lines_map.values()]))


figDir = "./figures"
if not os.path.exists(figDir):
   # Create a new directory because it does not exist
   os.makedirs(figDir)
    
leidenOrder=["ProliferatingProgenitors","RadialGliaProgenitors","OuterRadialGliaAstrocytes","CajalR_like","Neurons","MigratingNeurons","GlutamatergicNeurons_early","GlutamatergicNeurons_late","Interneurons_GAD2","Interneurons"]
dsOrder = ['UpD50', 'DownD50', 'UpD100_1', 'UpD100_2', 'DownD100', 'UpD300', 'DownD250']
cellIDOrder = ['CTL02A', 'CTL08A', 'CTL01', 'CTL04E']

DatasetBasedirs = {
"UpD50" : "../data/Sample_S20272_157/",
"DownD50" :"../data/Sample_S20273_158/",
"UpD100_1" : "../data/Sample_S20812_258/",
"UpD100_2" : "../data/Sample_S20813_259/",
"DownD250" : "../data/Sample_S20814_260/",
"DownD100" : "../data/Sample_S31807_MET6/",
"UpD300" : "../data/Sample_S33846_C_GEX/"
}
In [4]:
adataComp = sc.read(outdir+"/adatas/adataPaga.h5ad")
In [5]:
leidenColorsMaps = dict(zip(adataComp.obs["leidenAnnotated"].cat.categories,adataComp.uns["leidenAnnotated_colors"]))
cellIDColorsMap = dict(zip(adataComp.obs["cellID_newName"].cat.categories, adataComp.uns["cellID_newName_colors"]))
In [6]:
adataComp.obs["cellID_newName"].cat.categories.tolist()
Out[6]:
['CTL01', 'CTL02A', 'CTL04E', 'CTL08A']

Compositions¶

Is genotypes distribution consistent between pre and after filtering??¶

Here we want to check

  1. Cell type distribution per dataset
  2. ID x Cell type x dataset

First ID by dataset prior to filter¶

In [7]:
aggregatedCallsDFDict = {}
for ds in list(DatasetBasedirs.keys()):
    aggregatedCall = pd.read_csv(DatasetBasedirs[ds]+"/aggregatedCall/aggregatedCall.tsv", sep = "\t").rename(columns={"Consensus":"cellID_newName"})
    aggregatedCall.index = aggregatedCall["barcode"].tolist()
    aggregatedCall["dataset"] = ds
    del aggregatedCall["barcode"]
    aggregatedCallsDFDict[ds] = aggregatedCall
    
compositions = pd.concat(list(aggregatedCallsDFDict.values())).replace({iPSC_lines_map[i]["oldName"]:iPSC_lines_map[i]["newName"] for i in list(iPSC_lines_map.keys())})
compositions = compositions[compositions.cellID_newName.isin(adataComp.obs.cellID_newName.unique().tolist())]
compositions = pd.DataFrame(compositions.groupby(["dataset","cellID_newName"]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["dataset"] = compositions["dataset"].astype("category")

fig = px.bar(compositions, x="dataset", y="number_of_cells", color="cellID_newName", 
             category_orders={"dataset":dsOrder,"cellID_newName":cellIDOrder}, height=800,width=1000, template="plotly_white",
             color_discrete_map = dict(zip(adataComp.obs["cellID_newName"].cat.categories, adataComp.uns["cellID_newName_colors"])))

fig.update_traces(marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.5, opacity=1)

fig.update_yaxes(autorange="reversed")

fig.show()

ID by dataset¶

In [8]:
compositions = pd.DataFrame(adataComp.obs.groupby(["dataset","cellID_newName"]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
fig = px.bar(compositions, x="dataset", y="number_of_cells", color="cellID_newName", 
             category_orders={"dataset":dsOrder,"cellID_newName":cellIDOrder}, height=800,width=1000, template="plotly_white",
             color_discrete_map = dict(zip(adataComp.obs["cellID_newName"].cat.categories, adataComp.uns["cellID_newName_colors"])))

fig.update_traces(marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.5, opacity=1)

fig.show()

We observe that Overall ratios between celltypes is preserved after filtering¶

Celltype overview by dataset¶

Celltype by dataset not norm¶

In [9]:
compositions = pd.DataFrame(adataComp.obs.groupby(["leidenAnnotated","dataset"]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
fig = px.bar(compositions, x="dataset", y="number_of_cells", color="leidenAnnotated", 
             category_orders={"dataset":dsOrder,
                             "leidenAnnotated":leidenOrder}, height=800,width=1000, template="plotly_white",
             color_discrete_map = dict(zip(adataComp.obs["leidenAnnotated"].cat.categories,adataComp.uns["leidenAnnotated_colors"]))
)

fig.update_traces(marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.5, opacity=1)

fig.show()

Celltype by dataset norm¶

In [10]:
compositions = pd.DataFrame(adataComp.obs.groupby(["leidenAnnotated","dataset"]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["number_of_cells_normBycelltype"] = compositions["number_of_cells"] / np.array(compositions.groupby("dataset")["number_of_cells"].sum()[compositions["dataset"].tolist()])

fig = px.bar(compositions, x="dataset", y="number_of_cells_normBycelltype", color="leidenAnnotated", hover_data=compositions,
             category_orders={"dataset":['UpD50', 'DownD50', 'UpD100_1', 'UpD100_2', 'DownD100', 'UpD300', 'DownD250'],
                             "leidenAnnotated":leidenOrder}, height=800,width=1000, template="plotly_white",
             color_discrete_map = dict(zip(adataComp.obs["leidenAnnotated"].cat.categories,adataComp.uns["leidenAnnotated_colors"]))
)

fig.update_traces(marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.5, opacity=1)

fig.show()

Celltype by stage¶

In [11]:
compositions = pd.DataFrame(adataComp.obs.groupby(["leidenAnnotated","stage"]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["cellsFraction"] = compositions["number_of_cells"] / np.array(compositions.groupby("stage")["number_of_cells"].sum()[compositions["stage"].tolist()])

fig = px.bar(compositions, x="stage", y="cellsFraction", color="leidenAnnotated", hover_data=compositions,
             category_orders={"stage":['early', 'mid', 'late'],
                             "leidenAnnotated":leidenOrder}, height=800,width=1000, template="plotly_white",
             color_discrete_map = dict(zip(adataComp.obs["leidenAnnotated"].cat.categories,adataComp.uns["leidenAnnotated_colors"]))
)

fig.update_traces(marker_line_color='black',
                  marker_line_width=1, opacity=1)

fig.update_layout(
yaxis = dict(tickfont = dict(size=30)),
xaxis = dict(tickfont = dict(size=30)))

fig.show()

Celltype by type¶

In [12]:
compositions = pd.DataFrame(adataComp.obs.groupby(["leidenAnnotated","type"]).size())
compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
compositions["cellsFraction"] = compositions["number_of_cells"] / np.array(compositions.groupby("type")["number_of_cells"].sum()[compositions["type"].tolist()])

fig = px.bar(compositions, x="type", y="cellsFraction", color="leidenAnnotated", hover_data=compositions,
             category_orders={"type":['upstream', 'downstream'],
                             "leidenAnnotated":leidenOrder}, height=800,width=1000, template="plotly_white",
             color_discrete_map = dict(zip(adataComp.obs["leidenAnnotated"].cat.categories,adataComp.uns["leidenAnnotated_colors"]))
)

fig.update_traces(marker_line_color='black',
                  marker_line_width=1, opacity=1)

fig.update_layout(
yaxis = dict(tickfont = dict(size=30)),
xaxis = dict(tickfont = dict(size=30)))

fig.show()

Celltype by stage divided by paradigm¶

In [13]:
for paradigm in adataComp.obs.type.unique():
    adataCompStage = adataComp[adataComp.obs["type"] == paradigm]
    compositions = pd.DataFrame(adataCompStage.obs.groupby(["leidenAnnotated","stage"]).size())
    compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
    compositions["cells_fraction"] = compositions["number_of_cells"] / np.array(compositions.groupby("stage")["number_of_cells"].sum()[compositions["stage"].tolist()])

    fig = px.bar(compositions, x="stage", y="cells_fraction", color="leidenAnnotated", hover_data=compositions,
                 category_orders={"stage":['early', 'mid', 'late'],
                                 "leidenAnnotated":leidenOrder}, height=800,width=1000, template="plotly_white",title=paradigm,
                 color_discrete_map = dict(zip(adataCompStage.obs["leidenAnnotated"].cat.categories,adataCompStage.uns["leidenAnnotated_colors"]))
    )

    fig.update_traces(marker_line_color='black',
                      marker_line_width=1, opacity=1)

    fig.update_layout(
    yaxis = dict(tickfont = dict(size=30)),
    xaxis = dict(tickfont = dict(size=30)))

    fig.write_image(figDir+"/Compositions.CelltypeByStage."+paradigm+".pdf")
    fig.show()

Are cellt types equally distributed among genotypes?¶

ID by celltype by dataset (norm)¶

In [14]:
compositions
Out[14]:
leidenAnnotated stage number_of_cells cells_fraction
0 CajalR_like early 19 0.006434
1 CajalR_like mid 398 0.183834
2 CajalR_like late 1 0.000224
3 GlutamatergicNeurons_early early 510 0.172706
4 GlutamatergicNeurons_early mid 264 0.121940
5 GlutamatergicNeurons_early late 84 0.018800
6 GlutamatergicNeurons_late early 14 0.004741
7 GlutamatergicNeurons_late mid 31 0.014319
8 GlutamatergicNeurons_late late 1630 0.364816
9 Interneurons early 1 0.000339
10 Interneurons mid 2 0.000924
11 Interneurons late 293 0.065577
12 Interneurons_GAD2 early 17 0.005757
13 Interneurons_GAD2 mid 577 0.266513
14 Interneurons_GAD2 late 2 0.000448
15 MigratingNeurons early 740 0.250593
16 MigratingNeurons mid 294 0.135797
17 MigratingNeurons late 2 0.000448
18 Neurons early 1098 0.371825
19 Neurons mid 241 0.111316
20 Neurons late 1018 0.227842
21 OuterRadialGliaAstrocytes early 1 0.000339
22 OuterRadialGliaAstrocytes mid 47 0.021709
23 OuterRadialGliaAstrocytes late 379 0.084825
24 ProliferatingProgenitors early 268 0.090755
25 ProliferatingProgenitors mid 167 0.077136
26 ProliferatingProgenitors late 649 0.145255
27 RadialGliaProgenitors early 285 0.096512
28 RadialGliaProgenitors mid 144 0.066513
29 RadialGliaProgenitors late 410 0.091764
In [15]:
for ds in dsOrder:
    
    adataCompDS = adataComp[adataComp.obs["dataset"] == ds]
    
    compositions = pd.DataFrame(adataCompDS.obs.groupby(["leidenAnnotated","cellID_newName"]).size())
    compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
    compositions["number_of_cells_normBycelltype"] = compositions["number_of_cells"] / np.array(compositions.groupby("leidenAnnotated")["number_of_cells"].sum()[compositions["leidenAnnotated"].tolist()])
    
    #Keep only celltypes with at least n total cells regardless ID
    totPertype = compositions.groupby("leidenAnnotated")["number_of_cells"].sum()
    LeidenSelection = totPertype[totPertype / totPertype.sum() > .05].index.tolist()    
    compositions = compositions[compositions.leidenAnnotated.isin(LeidenSelection)]
    
    fig = px.bar(compositions, x="leidenAnnotated", y="number_of_cells_normBycelltype", color="cellID_newName", 
             category_orders={"dataset":["early", "mid", "late",],"leidenAnnotated":[l for l in leidenOrder if l in compositions["leidenAnnotated"].unique()]}, height=800,width=1000, title=ds,template="plotly_white",
             color_discrete_map = dict(zip(adataCompDS.obs["cellID_newName"].cat.categories, adataCompDS.uns["cellID_newName_colors"])))

    fig.update_traces(marker_line_color='rgb(8,48,107)',width=.97,
                  marker_line_width=1.5, opacity=1)
    
    fig.show()

ID by celltype by dataset (not norm)¶

In [16]:
adataComp.obs.groupby(["leidenAnnotated","cellID_newName"]).size().max()
Out[16]:
1722
In [17]:
threshold=0
for ds in dsOrder:
    
    adataCompDS = adataComp[adataComp.obs["dataset"] == ds]
    
    compositions = pd.DataFrame(adataCompDS.obs.groupby(["leidenAnnotated","cellID_newName"]).size())
    compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
    compositions["number_of_cells_normBycelltype"] = compositions["number_of_cells"] / np.array(compositions.groupby("leidenAnnotated")["number_of_cells"].sum()[compositions["leidenAnnotated"].tolist()])
    
    #Keep only celltypes with at least n total cells regardless ID
    totPertype = compositions.groupby("leidenAnnotated")["number_of_cells"].sum()
    LeidenSelection = totPertype[totPertype / totPertype.sum() > .05].index.tolist()    
    compositions = compositions[compositions.leidenAnnotated.isin(LeidenSelection)]
    
    fig = px.bar(compositions, x="leidenAnnotated", y="number_of_cells", color="cellID_newName", 
             category_orders={"dataset":["early", "mid", "late",],"leidenAnnotated":[l for l in leidenOrder if l in compositions["leidenAnnotated"].unique()]}, height=800,width=1000, title=ds,template="plotly_white",
             color_discrete_map = dict(zip(adataCompDS.obs["cellID_newName"].cat.categories, adataCompDS.uns["cellID_newName_colors"])))

    fig.update_traces(marker_line_color='rgb(8,48,107)',width=.97,
                  marker_line_width=1.5, opacity=1)
    fig.update_layout(yaxis_range=[0,adataComp.obs.groupby(["leidenAnnotated","cellID_newName"]).size().max()])


    fig.show()

No evident cell-type byas per genotype is highlighted in chimeric models with respect to their no chimeric counterpart¶

Same as above but looking at compositions within genotype¶

In [18]:
totPertype
Out[18]:
leidenAnnotated
CajalR_like                     3
GlutamatergicNeurons_early     13
GlutamatergicNeurons_late     204
Interneurons                  805
Interneurons_GAD2               1
Neurons                       149
OuterRadialGliaAstrocytes     207
ProliferatingProgenitors      182
RadialGliaProgenitors         207
Name: number_of_cells, dtype: int64
In [19]:
for ds in dsOrder:
    
    adataCompDS = adataComp[adataComp.obs["dataset"] == ds]
    
    compositions = pd.DataFrame(adataCompDS.obs.groupby(["cellID_newName","leidenAnnotated"]).size())
    compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
    
    #Keep only celltypes with at least n total cells regardless ID
    totPertype = compositions.groupby("leidenAnnotated")["number_of_cells"].sum()
    LeidenSelection = totPertype[totPertype / totPertype.sum() > .05].index.tolist()    
    compositions = compositions[compositions.leidenAnnotated.isin(LeidenSelection)]

    
    #Keep only IDs with at least 100 cells by dataset
    totPertype = compositions.groupby("cellID_newName")["number_of_cells"].sum()
    IDSelection = totPertype[totPertype <= 30].index.tolist()    
    
    compositions.loc[compositions.cellID_newName.isin(IDSelection),"number_of_cells"] = 0
    compositions
    
    
    compositions["number_of_cells_normBygenotype"] = compositions["number_of_cells"] / np.array(compositions.groupby("cellID_newName")["number_of_cells"].sum()[compositions["cellID_newName"].tolist()])


    lines = [l for l in adataComp.obs["cellID_newName"].cat.categories.tolist() if l in adataCompDS.obs["cellID_newName"].cat.categories.tolist()]
    fig = px.bar(compositions, x="cellID_newName", y="number_of_cells_normBygenotype", color="leidenAnnotated", 
             category_orders={"cellID_newName":lines}, 
                 height=800,width=1000, title=ds,template="plotly_white",
             color_discrete_map = leidenColorsMaps)
    
    
    x_labels = lines
    totals = compositions.groupby("cellID_newName")["number_of_cells"].sum()[lines]
    total_labels = [{"x": x, "y": 1.1, "text": str(total), "showarrow": False} for x, total in zip(x_labels, totals)]


    fig.update_traces(marker_line_color='rgb(0,0,0)',width=.97,
                  marker_line_width=1.5, opacity=1)

    fig.update_layout(annotations=total_labels)
    
    fig.show()
In [20]:
threshold=0
for ds in dsOrder:
    
    adataCompDS = adataComp[adataComp.obs["dataset"] == ds]
    
    compositions = pd.DataFrame(adataCompDS.obs.groupby(["leidenAnnotated","cellID_newName"]).size())
    compositions = compositions.reset_index().rename(columns={0:"number_of_cells"})
    compositions["number_of_cells_normBycelltype"] = compositions["number_of_cells"] / np.array(compositions.groupby("leidenAnnotated")["number_of_cells"].sum()[compositions["leidenAnnotated"].tolist()])
    
    #Keep only celltypes with at least n total cells regardless ID
    totPertype = compositions.groupby("leidenAnnotated")["number_of_cells"].sum()
    LeidenSelection = totPertype[totPertype / totPertype.sum() > .05].index.tolist()    
    compositions = compositions[compositions.leidenAnnotated.isin(LeidenSelection)]
    
    fig = px.bar(compositions, x="leidenAnnotated", y="number_of_cells", color="cellID_newName", 
             category_orders={"dataset":["early", "mid", "late",],"leidenAnnotated":[l for l in leidenOrder if l in compositions["leidenAnnotated"].unique()]}, height=800,width=1000, title=ds,template="plotly_white",
             color_discrete_map = dict(zip(adataCompDS.obs["cellID_newName"].cat.categories, adataCompDS.uns["cellID_newName_colors"])))

    fig.update_traces(marker_line_color='rgb(8,48,107)',width=.97,
                  marker_line_width=1.5, opacity=1)
    fig.update_layout(yaxis_range=[0,adataComp.obs.groupby(["leidenAnnotated","cellID_newName"]).size().max()])


    fig.show()

MILO mosaic vs downstream¶

In [21]:
import anndata2ri
import rpy2.rinterface_lib.callbacks
import logging
In [22]:
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)
In [23]:
anndata2ri.activate()
In [24]:
%load_ext rpy2.ipython
In [25]:
%%R
library(miloR)
library(igraph)
library(ggplot2)
In [26]:
#Adata
cov = "type"
adataMilo = sc.read_h5ad(outdir+"/adatas/ClusterAnnotated_Base_filt.h5ad")
adataCMB = adataMilo.copy()
adata_no_knn = adataCMB.copy()
adata_no_knn.obsp = None
adata_no_knn.uns.pop("neighbors")
del adata_no_knn.uns
del adata_no_knn.varm
del adata_no_knn.obsm
#adata_no_knn.uns.pop("nhoods")
#adata_no_knn.X = adata_no_knn.X.todense()
adata_no_knn.obsm["X_pca"] = adataCMB.obsm["X_pca"].copy()
adata_no_knn.obsm["X_umap"] = adataCMB.obsm["X_umap"].copy()
adata_no_knn.obsm["X_draw_graph_fa"] = adataCMB.obsm["X_draw_graph_fa"].copy()


levelsOrder = adataCMB.obs["stage"].unique().tolist()
levelsOrder
Out[26]:
['late', 'mid', 'early']
In [27]:
# DesignMAtrix
design_df = adataCMB.obs[["dataset",cov]]
design_df.drop_duplicates(inplace=True)
design_df.index = design_df['dataset']
design_df
Out[27]:
dataset type
dataset
DownD250 DownD250 downstream
UpD300 UpD300 upstream
DownD100 DownD100 downstream
UpD100_1 UpD100_1 upstream
UpD100_2 UpD100_2 upstream
DownD50 DownD50 downstream
UpD50 UpD50 upstream
In [28]:
%%R -i adata_no_knn

milo <- Milo(adata_no_knn)
milo <- buildGraph(milo, k=20, d=30)
milo
class: Milo 
dim: 3499 14913 
metadata(0):
assays(1): X
rownames(3499): LINC00115 AL645608.7 ... MT-ND6 MT-CYB
rowData names(3): highly_variable mean std
colnames(14913): AAACCTGAGAGACTAT-1_DownD250
  AAACCTGCATGGTTGT-1_DownD250 ... TTTGTCATCGCGTAGC-1_UpD50
  TTTGTCATCGTTTATC-1_UpD50
colData names(21): dataset cellID ... phase leidenAnnotated
reducedDimNames(3): PCA UMAP X_draw_graph_fa
altExpNames(0):
nhoods dimensions(2): 1 1
nhoodCounts dimensions(2): 1 1
nhoodDistances dimension(1): 0
graph names(1): graph
nhoodIndex names(1): 0
nhoodExpression dimension(2): 1 1
nhoodReducedDim names(0):
nhoodGraph names(0):
nhoodAdjacency dimension(2): 1 1
In [29]:
%%R -i design_df -i levelsOrder -o DA_results


milo <- makeNhoods(milo, prop = 0.1, k = 20, d=20, refined = TRUE)

milo <- countCells(milo, meta.data = data.frame(colData(milo)), sample="dataset")


milo <- calcNhoodDistance(milo, d=20)


DA_results <- testNhoods(milo, design = ~ type, design.df = design_df)
In [30]:
DA_results
Out[30]:
logFC logCPM F PValue FDR Nhood SpatialFDR
1 -1.870903 9.573516 1.169554 0.290096 0.997542 1.0 0.996881
2 -0.224436 9.972568 0.027922 0.868673 0.997542 2.0 0.996881
3 -4.413693 9.813314 4.726022 0.040578 0.997542 3.0 0.996881
4 -0.779635 10.120266 0.296486 0.591050 0.997542 4.0 0.996881
5 -0.626142 10.603573 0.154783 0.697433 0.997542 5.0 0.996881
... ... ... ... ... ... ... ...
1123 0.172820 9.422320 0.014236 0.905994 0.997542 1123.0 0.996881
1124 -0.271387 9.884761 0.038866 0.845352 0.997542 1124.0 0.996881
1125 -0.201787 9.607345 0.018218 0.893742 0.997542 1125.0 0.996881
1126 -0.324497 9.585365 0.052572 0.820485 0.997542 1126.0 0.996881
1127 -0.378578 10.111251 0.063964 0.802462 0.997542 1127.0 0.996881

1127 rows × 7 columns

In [31]:
%%R
milo <- buildNhoodGraph(milo)
In [32]:
%%R
milo
class: Milo 
dim: 3499 14913 
metadata(0):
assays(1): X
rownames(3499): LINC00115 AL645608.7 ... MT-ND6 MT-CYB
rowData names(3): highly_variable mean std
colnames(14913): AAACCTGAGAGACTAT-1_DownD250
  AAACCTGCATGGTTGT-1_DownD250 ... TTTGTCATCGCGTAGC-1_UpD50
  TTTGTCATCGTTTATC-1_UpD50
colData names(21): dataset cellID ... phase leidenAnnotated
reducedDimNames(3): PCA UMAP X_draw_graph_fa
altExpNames(0):
nhoods dimensions(2): 14913 1127
nhoodCounts dimensions(2): 1127 7
nhoodDistances dimension(1): 1127
graph names(1): graph
nhoodIndex names(1): 1127
nhoodExpression dimension(2): 1 1
nhoodReducedDim names(0):
nhoodGraph names(1): nhoodGraph
nhoodAdjacency dimension(2): 1127 1127
In [33]:
%%R -w 1000 -h 800 -r 100

plotNhoodGraphDA(milo, DA_results,  alpha=.1,size_range=c(2,3), layout="X_draw_graph_fa")+
  scale_fill_gradient2(high='#be1622', mid='white', low="#534ca3", name="logFC")+ theme(
  panel.background = element_rect(fill = "transparent",
                                  colour = NA_character_), # necessary to avoid drawing panel outline
  panel.grid.major = element_blank(), # get rid of major grid
  panel.grid.minor = element_blank(), # get rid of minor grid
  plot.background = element_rect(fill = "transparent",
                                 colour = NA_character_), # necessary to avoid drawing plot outline
  legend.background = element_rect(fill = "transparent"),
)
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
In [34]:
%%R -i figDir
ggsave(
  paste0(figDir,"/DA.logFC.upstream_vs_downstream.png"),
  plot = last_plot(),
  device = "png",
  scale = 1,bg = "transparent",
  width = 2200,
  height = 1640,
  units = c("px"),
  dpi = 300)

MILO mod: Anova like Stage wise vs all¶

In [35]:
import anndata2ri
import rpy2.rinterface_lib.callbacks
import logging
In [36]:
rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)
In [37]:
anndata2ri.activate()
In [38]:
%load_ext rpy2.ipython
The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython
In [39]:
%%R
library(miloR)
library(igraph)
library(ggplot2)
library(ggplot2)
In [40]:
#Adata
cov = "stage"
adataMilo = sc.read_h5ad(outdir+"/adatas/ClusterAnnotated_Base_filt.h5ad")
adataCMB = adataMilo.copy()
adata_no_knn = adataCMB.copy()
adata_no_knn.obsp = None
adata_no_knn.uns.pop("neighbors")
del adata_no_knn.uns
del adata_no_knn.varm
del adata_no_knn.obsm
#adata_no_knn.uns.pop("nhoods")
#adata_no_knn.X = adata_no_knn.X.todense()
adata_no_knn.obsm["X_pca"] = adataCMB.obsm["X_pca"].copy()
adata_no_knn.obsm["X_umap"] = adataCMB.obsm["X_umap"].copy()
adata_no_knn.obsm["X_draw_graph_fa"] = adataCMB.obsm["X_draw_graph_fa"].copy()


levelsOrder = adataCMB.obs["stage"].unique().tolist()
levelsOrder
Out[40]:
['late', 'mid', 'early']
In [41]:
# DesignMAtrix
design_df = adataCMB.obs[["dataset","stage"]]
design_df.drop_duplicates(inplace=True)
design_df.index = design_df['dataset']
design_df
Out[41]:
dataset stage
dataset
DownD250 DownD250 late
UpD300 UpD300 late
DownD100 DownD100 mid
UpD100_1 UpD100_1 mid
UpD100_2 UpD100_2 mid
DownD50 DownD50 early
UpD50 UpD50 early
In [42]:
%%R -i adata_no_knn

milo <- Milo(adata_no_knn)
milo <- buildGraph(milo, k=20, d=30)
milo
class: Milo 
dim: 3499 14913 
metadata(0):
assays(1): X
rownames(3499): LINC00115 AL645608.7 ... MT-ND6 MT-CYB
rowData names(3): highly_variable mean std
colnames(14913): AAACCTGAGAGACTAT-1_DownD250
  AAACCTGCATGGTTGT-1_DownD250 ... TTTGTCATCGCGTAGC-1_UpD50
  TTTGTCATCGTTTATC-1_UpD50
colData names(21): dataset cellID ... phase leidenAnnotated
reducedDimNames(3): PCA UMAP X_draw_graph_fa
altExpNames(0):
nhoods dimensions(2): 1 1
nhoodCounts dimensions(2): 1 1
nhoodDistances dimension(1): 0
graph names(1): graph
nhoodIndex names(1): 0
nhoodExpression dimension(2): 1 1
nhoodReducedDim names(0):
nhoodGraph names(0):
nhoodAdjacency dimension(2): 1 1
In [43]:
%%R

#' @export
#' @importFrom stats model.matrix
#' @importFrom Matrix colSums rowMeans
#' @importFrom stats dist median
#' @importFrom limma makeContrasts
#' @importFrom edgeR DGEList estimateDisp glmQLFit glmQLFTest topTags calcNormFactors


.check_empty <- function(x, attribute){
    # check if a Milo object slot is empty or not
    x.slot <- slot(x, attribute)

    if(is.list(x.slot) & names(slot(x, "graph")) == "graph"){
        return(length(x.slot[[1]]) > 0)
    } else if(is.list(x.slot) & is.null(names(x.slot))){
        return(length(x.slot))
    } else if(any(class(x.slot) %in% c("dgCMatrix", "dsCMatrix", "ddiMatrix", "matrix"))){
        return(sum(rowSums(x.slot)) == 0)
    }
}





testNhoods_mod <- function(x, design, design.df,
                       fdr.weighting=c("k-distance", "neighbour-distance", "max", "graph-overlap", "none"),
                       min.mean=0, model.contrasts=NULL, robust=TRUE, reduced.dim="PCA",design.coef=NULL, design.mcontrast=NULL, 
                       norm.method=c("TMM", "RLE", "logMS")){
    if(is(design, "formula")){
        model <- model.matrix(design, data=design.df)
        rownames(model) <- rownames(design.df)
    } else if(is(design, "matrix")){
        model <- design
        if(nrow(model) != nrow(design.df)){
            stop("Design matrix and model matrix are not the same dimensionality")
        }

        if(any(rownames(model) != rownames(design.df))){
            warning("Design matrix and model matrix dimnames are not the same")
            # check if rownames are a subset of the design.df
            check.names <- any(rownames(model) %in% rownames(design.df))
            if(isTRUE(check.names)){
                rownames(model) <- rownames(design.df)
            } else{
                stop("Design matrix and model matrix rownames are not a subset")
            }
        }
    }

    if(!is(x, "Milo")){
        stop("Unrecognised input type - must be of class Milo")
    } else if(.check_empty(x, "nhoodCounts")){
        stop("Neighbourhood counts missing - please run countCells first")
    }

    if(!any(norm.method %in% c("TMM", "logMS", "RLE"))){
        stop("Normalisation method ", norm.method, " not recognised. Must be either TMM, RLE or logMS")
    }

    if(!reduced.dim %in% reducedDimNames(x)){
        stop(reduced.dim, " is not found in reducedDimNames. Avaiable options are ", paste(reducedDimNames(x), collapse=","))
    }

    subset.counts <- FALSE
    if(ncol(nhoodCounts(x)) != nrow(model)){
        # need to allow for design.df with a subset of samples only
        if(all(rownames(model) %in% colnames(nhoodCounts(x)))){
            message("Design matrix is a strict subset of the nhood counts")
            subset.counts <- TRUE
        } else{
            stop("Design matrix (", nrow(model), ") and nhood counts (",
                 ncol(nhoodCounts(x)), ") are not the same dimension")
        }
    }

    # assume nhoodCounts and model are in the same order
    # cast as DGEList doesn't accept sparse matrices
    # what is the cost of cast a matrix that is already dense vs. testing it's class
    if(min.mean > 0){
        if(isTRUE(subset.counts)){
            keep.nh <- rowMeans(nhoodCounts(x)[, rownames(model)]) >= min.mean
        } else{
            keep.nh <- rowMeans(nhoodCounts(x)) >= min.mean
        }
    } else{
        if(isTRUE(subset.counts)){
            keep.nh <- rep(TRUE, nrow(nhoodCounts(x)[, rownames(model)]))
        }else{
            keep.nh <- rep(TRUE, nrow(nhoodCounts(x)))
        }
    }

    if(isTRUE(subset.counts)){
        keep.samps <- intersect(rownames(model), colnames(nhoodCounts(x)[keep.nh, ]))
    } else{
        keep.samps <- colnames(nhoodCounts(x)[keep.nh, ])
    }

    if(any(colnames(nhoodCounts(x)[keep.nh, keep.samps]) != rownames(model)) & !any(colnames(nhoodCounts(x)[keep.nh, keep.samps]) %in% rownames(model))){
        stop("Sample names in design matrix and nhood counts are not matched.
             Set appropriate rownames in design matrix.")
    } else if(any(colnames(nhoodCounts(x)[keep.nh, keep.samps]) != rownames(model)) & any(colnames(nhoodCounts(x)[keep.nh, keep.samps]) %in% rownames(model))){
        warning("Sample names in design matrix and nhood counts are not matched. Reordering")
        model <- model[colnames(nhoodCounts(x)[keep.nh, keep.samps]), ]
    }

    if(length(norm.method) > 1){
        message("Using TMM normalisation")
        dge <- DGEList(counts=nhoodCounts(x)[keep.nh, keep.samps],
                       lib.size=colSums(nhoodCounts(x)[keep.nh, keep.samps]))
        dge <- calcNormFactors(dge, method="TMM")
    } else if(norm.method %in% c("TMM")){
        message("Using TMM normalisation")
        dge <- DGEList(counts=nhoodCounts(x)[keep.nh, keep.samps],
                       lib.size=colSums(nhoodCounts(x)[keep.nh, keep.samps]))
        dge <- calcNormFactors(dge, method="TMM")
    } else if(norm.method %in% c("RLE")){
        message("Using RLE normalisation")
        dge <- DGEList(counts=nhoodCounts(x)[keep.nh, keep.samps],
                       lib.size=colSums(nhoodCounts(x)[keep.nh, keep.samps]))
        dge <- calcNormFactors(dge, method="RLE")
    }else if(norm.method %in% c("logMS")){
        message("Using logMS normalisation")
        dge <- DGEList(counts=nhoodCounts(x)[keep.nh, keep.samps],
                       lib.size=colSums(nhoodCounts(x)[keep.nh, keep.samps]))
    }
    print(model)
    dge <- estimateDisp(dge, model)
    fit <- glmQLFit(dge, model, robust=robust)
    if(!is.null(model.contrasts)){
        mod.constrast <- makeContrasts(contrasts=model.contrasts, levels=model)
        res <- as.data.frame(topTags(glmQLFTest(fit, contrast=mod.constrast),
                                     sort.by='none', n=Inf))
    } else if (!is.null(design.coef)) {
        n.coef <- design.coef
        res <- as.data.frame(topTags(glmQLFTest(fit, coef=n.coef), sort.by='none', n=Inf))
    } else if (!is.null(design.mcontrast)) {
        res <- as.data.frame(topTags(glmQLFTest(fit, contrast=design.mcontrast), sort.by='none', n=Inf))
    } else{
        n.coef <- ncol(model)
        res <- as.data.frame(topTags(glmQLFTest(fit, coef=n.coef), sort.by='none', n=Inf))
    }
             

    res$Nhood <- as.numeric(rownames(res))
    message("Performing spatial FDR correction with", fdr.weighting[1], " weighting")
    mod.spatialfdr <- graphSpatialFDR(x.nhoods=nhoods(x),
                                      graph=graph(x),
                                      weighting=fdr.weighting,
                                      k=x@.k,
                                      pvalues=res[order(res$Nhood), ]$PValue,
                                      indices=nhoodIndex(x),
                                      distances=nhoodDistances(x),
                                      reduced.dimensions=reducedDim(x, reduced.dim))

    res$SpatialFDR[order(res$Nhood)] <- mod.spatialfdr
    res
}
In [44]:
%%R -i design_df -i levelsOrder -o DA_results


milo <- makeNhoods(milo, prop = 0.1, k = 20, d=20, refined = TRUE)

milo <- countCells(milo, meta.data = data.frame(colData(milo)), sample="dataset")


milo <- calcNhoodDistance(milo, d=20)

model <- model.matrix(~0+stage, data=design_df)
rownames(model) <- rownames(design_df)
print(model)

contrasts <- makeContrasts("late_vs_all" = stagelate-(stagemid+stageearly)/2,
                           "mid_vs_all" = stagemid-(stageearly+stagelate)/2,
                           "early_vs_all" = stageearly-(stagelate+stagemid)/2,
                           levels = model)

print(contrasts)
DA_results <- testNhoods_mod(milo, design = ~0+stage, design.df = design_df, design.mcontrast=contrasts)
         stageearly stagemid stagelate
DownD250          0        0         1
UpD300            0        0         1
DownD100          0        1         0
UpD100_1          0        1         0
UpD100_2          0        1         0
DownD50           1        0         0
UpD50             1        0         0
attr(,"assign")
[1] 1 1 1
attr(,"contrasts")
attr(,"contrasts")$stage
[1] "contr.poly"

            Contrasts
Levels       late_vs_all mid_vs_all early_vs_all
  stageearly        -0.5       -0.5          1.0
  stagemid          -0.5        1.0         -0.5
  stagelate          1.0       -0.5         -0.5
         stageearly stagemid stagelate
UpD50             1        0         0
DownD50           1        0         0
UpD100_1          0        1         0
UpD100_2          0        1         0
DownD100          0        1         0
UpD300            0        0         1
DownD250          0        0         1
In [45]:
DA_results
Out[45]:
logFC.late_vs_all logFC.mid_vs_all logFC.early_vs_all logCPM F PValue FDR Nhood SpatialFDR
1 -3.584693 7.169386 -3.584693 10.026001 11.723229 8.347220e-06 4.455427e-05 1.0 4.686394e-05
2 1.693425 -0.214222 -1.479203 9.756943 1.004361 3.663587e-01 4.068696e-01 2.0 4.013494e-01
3 -4.778575 7.042961 -2.264386 10.646986 15.052969 3.080132e-07 2.868994e-06 3.0 3.033615e-06
4 4.056163 -1.090696 -2.965467 9.913543 5.479169 4.199957e-03 8.616252e-03 4.0 8.543189e-03
5 -0.583489 -0.277572 0.861062 9.961791 0.289668 7.485256e-01 7.767718e-01 5.0 7.702682e-01
... ... ... ... ... ... ... ... ... ...
1151 4.626752 0.467153 -5.093905 9.490436 4.563711 1.047034e-02 1.821272e-02 1151.0 1.801695e-02
1152 -1.574993 2.700998 -1.126005 10.079177 3.111573 4.462419e-02 6.179968e-02 1152.0 6.105845e-02
1153 -5.505421 1.897136 3.608284 9.520438 2.910493 5.454873e-02 7.333277e-02 1153.0 7.241432e-02
1154 8.137770 -4.068885 -4.068885 10.506148 17.108847 4.135777e-08 5.755208e-07 1154.0 6.283388e-07
1155 -1.981460 0.814385 1.167075 10.155711 1.257606 2.844313e-01 3.227094e-01 1155.0 3.185263e-01

1155 rows × 9 columns

In [46]:
%%R
milo <- buildNhoodGraph(milo)
In [47]:
%%R
milo
class: Milo 
dim: 3499 14913 
metadata(0):
assays(1): X
rownames(3499): LINC00115 AL645608.7 ... MT-ND6 MT-CYB
rowData names(3): highly_variable mean std
colnames(14913): AAACCTGAGAGACTAT-1_DownD250
  AAACCTGCATGGTTGT-1_DownD250 ... TTTGTCATCGCGTAGC-1_UpD50
  TTTGTCATCGTTTATC-1_UpD50
colData names(21): dataset cellID ... phase leidenAnnotated
reducedDimNames(3): PCA UMAP X_draw_graph_fa
altExpNames(0):
nhoods dimensions(2): 14913 1155
nhoodCounts dimensions(2): 1155 7
nhoodDistances dimension(1): 1155
graph names(1): graph
nhoodIndex names(1): 1155
nhoodExpression dimension(2): 1 1
nhoodReducedDim names(0):
nhoodGraph names(1): nhoodGraph
nhoodAdjacency dimension(2): 1155 1155
In [48]:
%%R -w 1000 -h 800 -r 100

plotNhoodGraphDA(milo, DA_results,  alpha=.1,size_range=c(2,3),res_column="logFC.late_vs_all", layout="X_draw_graph_fa")+
  scale_fill_gradient2(high='#be1622', mid='white', low="#534ca3", name="logFC")+ theme(
  panel.background = element_rect(fill = "transparent",
                                  colour = NA_character_), # necessary to avoid drawing panel outline
  panel.grid.major = element_blank(), # get rid of major grid
  panel.grid.minor = element_blank(), # get rid of minor grid
  plot.background = element_rect(fill = "transparent",
                                 colour = NA_character_), # necessary to avoid drawing plot outline
  legend.background = element_rect(fill = "transparent"),
)
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
In [49]:
%%R
ggsave(
  paste0(figDir,"/DA.logFC.late_vs_all.png"),
  plot = last_plot(),
  device = "png",
  scale = 1,bg = "transparent",
  width = 2200,
  height = 1640,
  units = c("px"),
  dpi = 300)
In [50]:
%%R -w 1000 -h 800 -r 100


plotNhoodGraphDA(milo, DA_results,  alpha=.1,size_range=c(2,3),res_column="logFC.early_vs_all", layout="X_draw_graph_fa")+
  scale_fill_gradient2(high='#be1622', mid='white', low="#534ca3", name="logFC")+ theme(
  panel.background = element_rect(fill = "transparent",
                                  colour = NA_character_), # necessary to avoid drawing panel outline
  panel.grid.major = element_blank(), # get rid of major grid
  panel.grid.minor = element_blank(), # get rid of minor grid
  plot.background = element_rect(fill = "transparent",
                                 colour = NA_character_), # necessary to avoid drawing plot outline
  legend.background = element_rect(fill = "transparent"),
)
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
In [51]:
%%R

ggsave(
    paste0(figDir,"/DA.logFC.early_vs_all.png"),
  plot = last_plot(),
  device = "png",
  scale = 1,bg = "transparent",
  width = 2200,
  height = 1640,
  units = c("px"),
  dpi = 300)
In [52]:
%%R -w 1000 -h 800 -r 100

plotNhoodGraphDA(milo, DA_results,  alpha=.1,size_range=c(2,3),res_column="logFC.mid_vs_all", layout="X_draw_graph_fa")+
  scale_fill_gradient2(high='#be1622', mid='white', low="#534ca3", name="logFC")+ theme(
  panel.background = element_rect(fill = "transparent",
                                  colour = NA_character_), # necessary to avoid drawing panel outline
  panel.grid.major = element_blank(), # get rid of major grid
  panel.grid.minor = element_blank(), # get rid of minor grid
  plot.background = element_rect(fill = "transparent",
                                 colour = NA_character_), # necessary to avoid drawing plot outline
  legend.background = element_rect(fill = "transparent"),
)
Scale for fill is already present.
Adding another scale for fill, which will replace the existing scale.
In [53]:
%%R


ggsave(
        paste0(figDir,"/DA.logFC.mid_vs_all.png"),
  plot = last_plot(),
  device = "png",
  scale = 1,bg = "transparent",
  width = 2200,
  height = 1640,
  units = c("px"),
  dpi = 300)
In [54]:
%%R -o nhoods
nhoods <- milo@nhoods
In [55]:
DA_results
Out[55]:
logFC.late_vs_all logFC.mid_vs_all logFC.early_vs_all logCPM F PValue FDR Nhood SpatialFDR
1 -3.584693 7.169386 -3.584693 10.026001 11.723229 8.347220e-06 4.455427e-05 1.0 4.686394e-05
2 1.693425 -0.214222 -1.479203 9.756943 1.004361 3.663587e-01 4.068696e-01 2.0 4.013494e-01
3 -4.778575 7.042961 -2.264386 10.646986 15.052969 3.080132e-07 2.868994e-06 3.0 3.033615e-06
4 4.056163 -1.090696 -2.965467 9.913543 5.479169 4.199957e-03 8.616252e-03 4.0 8.543189e-03
5 -0.583489 -0.277572 0.861062 9.961791 0.289668 7.485256e-01 7.767718e-01 5.0 7.702682e-01
... ... ... ... ... ... ... ... ... ...
1151 4.626752 0.467153 -5.093905 9.490436 4.563711 1.047034e-02 1.821272e-02 1151.0 1.801695e-02
1152 -1.574993 2.700998 -1.126005 10.079177 3.111573 4.462419e-02 6.179968e-02 1152.0 6.105845e-02
1153 -5.505421 1.897136 3.608284 9.520438 2.910493 5.454873e-02 7.333277e-02 1153.0 7.241432e-02
1154 8.137770 -4.068885 -4.068885 10.506148 17.108847 4.135777e-08 5.755208e-07 1154.0 6.283388e-07
1155 -1.981460 0.814385 1.167075 10.155711 1.257606 2.844313e-01 3.227094e-01 1155.0 3.185263e-01

1155 rows × 9 columns

Retrieve cells ID belonging to nhoods with >= 2logfc & <= 0.01 FDR per stage¶

In [56]:
FDRthreshold = 0.005
FCthrehsold = 3

for contrast in [col.split(".")[1] for col in  DA_results.columns if "logFC" in col]:
    Neighbmask = (DA_results["logFC."+contrast] > FCthrehsold) & (DA_results["SpatialFDR"] < FDRthreshold)
    DAcells = adata_no_knn.obs_names[(nhoods[:,Neighbmask].sum(axis = 1) > 0).A1].tolist()
    adataComp.obs["DiffAbundant."+contrast] = 0
    adataComp.obs.loc[DAcells, "DiffAbundant."+contrast] = 1  
    adataComp.obs["DiffAbundant."+contrast] = adataComp.obs["DiffAbundant."+contrast].astype("str")
    
sc.pl.draw_graph(adataComp, color = ["DiffAbundant."+col.split(".")[1] for col in  DA_results.columns if "logFC" in col]+["leidenAnnotated"],ncols=2, 
           size = 10,add_outline=True, outline_width=(0.2, 0.05), vmin='p2',vmax='p98',  frameon=False, wspace=.3, legend_fontsize='xx-large')
In [57]:
import holoviews as hv
obsTupleToMap = ("leidenAnnotated","DiffAbundant.mid_vs_all")
SankeyDF=adataComp.obs[list(obsTupleToMap)]
SankeyDF.columns = ["leidenAnnotated","DiffAbundant.mid_vs_all"]
SankeyDF = SankeyDF.groupby(['leidenAnnotated','DiffAbundant.mid_vs_all']).size().reset_index().rename(columns={0:'count'})
SankeyDF=SankeyDF[SankeyDF["count"] != 0]
hv.extension('bokeh')




sankey1 = hv.Sankey(SankeyDF, kdims=["leidenAnnotated", "DiffAbundant.mid_vs_all"], vdims="count")



sankey1.opts(cmap='Colorblind',label_position='outer',
edge_color='leidenAnnotated', edge_line_width=0,
node_alpha=1.0, node_width=40, node_sort=True,
width=1100, height=700, bgcolor="white")
Out[57]:
In [58]:
obsTupleToMap = ("leidenAnnotated","DiffAbundant.late_vs_all")
SankeyDF=adataComp.obs[list(obsTupleToMap)]
SankeyDF.columns = ["leidenAnnotated","DiffAbundant.late_vs_all"]
SankeyDF = SankeyDF.groupby(['leidenAnnotated','DiffAbundant.late_vs_all']).size().reset_index().rename(columns={0:'count'})
SankeyDF=SankeyDF[SankeyDF["count"] != 0]
hv.extension('bokeh')




sankey1 = hv.Sankey(SankeyDF, kdims=["leidenAnnotated", "DiffAbundant.late_vs_all"], vdims="count")



sankey1.opts(cmap='Colorblind',label_position='outer',
edge_color='leidenAnnotated', edge_line_width=0,
node_alpha=1.0, node_width=40, node_sort=True,
width=1100, height=700, bgcolor="white")
Out[58]:
In [59]:
obsTupleToMap = ("leidenAnnotated","DiffAbundant.early_vs_all")
SankeyDF=adataComp.obs[list(obsTupleToMap)]
SankeyDF.columns = ["leidenAnnotated","DiffAbundant.early_vs_all"]
SankeyDF = SankeyDF.groupby(['leidenAnnotated','DiffAbundant.early_vs_all']).size().reset_index().rename(columns={0:'count'})
SankeyDF=SankeyDF[SankeyDF["count"] != 0]
hv.extension('bokeh')




sankey1 = hv.Sankey(SankeyDF, kdims=["leidenAnnotated", "DiffAbundant.early_vs_all"], vdims="count")



sankey1.opts(cmap='Colorblind',label_position='outer',
edge_color='leidenAnnotated', edge_line_width=0,
node_alpha=1.0, node_width=40, node_sort=True,
width=1100, height=700, bgcolor="white")
Out[59]:

Heatmap¶

In [60]:
sc.pl.correlation_matrix(adataComp, 'leidenAnnotated', figsize=(10,10), dendrogram=True)
WARNING: dendrogram data not found (using key=dendrogram_leidenAnnotated). Running `sc.tl.dendrogram` with default parameters. For fine tuning it is recommended to run `sc.tl.dendrogram` independently.
    using 'X_pca' with n_pcs = 50
Storing dendrogram info using `.uns['dendrogram_leidenAnnotated']`
In [61]:
adataCompCellXgene = adataComp.raw.to_adata().copy()
In [62]:
adataCompCellXgene.write_h5ad(outdir+"/adatas/adataComp_cellXgene.h5ad")
In [63]:
adataComp.raw.to_adata().X
Out[63]:
<14913x33538 sparse matrix of type '<class 'numpy.float32'>'
	with 31521790 stored elements in Compressed Sparse Row format>
In [64]:
# Interneurons scatter

tmp = pd.DataFrame(adataComp[adataComp.obs["leidenAnnotated"] == "Interneurons_GAD2"].X.T, 
            index = adataComp[adataComp.obs["leidenAnnotated"] == "Interneurons_GAD2"].var_names,
        columns = adataComp[adataComp.obs["leidenAnnotated"] == "Interneurons_GAD2"].obs_names).loc[["DLX2","DLX5","DLX6-AS1","GAD1","GAD2","SLC32A1","SLC17A7","SLC17A6","SLC17A8","NNAT","GRIN1","GRIN2B","GRID2","GRIA1","GRIA2","SLC2A1","NOVA1", "CUX1"]].T
In [65]:
sns.scatterplot(data=tmp, x="GAD1", y="CUX1")
Out[65]:
<AxesSubplot:xlabel='GAD1', ylabel='CUX1'>
In [ ]: